County Figures
theme_c <- function(...){
# font <- "Helvetica" #assign font family up front
theme_bw() %+replace% #replace elements we want to change
theme(
#text elements
plot.title = element_text( #title
size = 16, #set font size
face = 'bold', #bold typeface
hjust = .5,
vjust = 3),
plot.subtitle = element_text( #subtitle
size = 12,
hjust = .5,
face = 'italic',
vjust = 3), #font size
axis.title = element_text( #axis titles
size = 12), #font size
axis.text = element_text( #axis text
size = 9),
legend.text = element_text(size = 12),
# t, r, b, l
plot.margin = unit(c(1,.5,.5,.5), "cm"),
legend.position = "right",
strip.text.x = element_text(size = 11, face = "bold", color="white"),
strip.text.y = element_text(size = 11, face = "bold", color="white",angle=270),
strip.background = element_rect(fill = "#3E3D3D")
) %+replace%
theme(...)
}targets_dir <- here("_targets")
county <- tar_read(ma_v1,
store =targets_dir) %>%
mutate(version = "v1") %>%
bind_rows(
tar_read(ma_v2,
store =targets_dir)
) %>%
bind_rows(
tar_read(ma_v3,
store =targets_dir)
) %>%
bind_rows(
tar_read(ma_v4,
store =targets_dir)
)%>%
bind_rows(
tar_read(ma_v5,
store =targets_dir)
)%>%
bind_rows(
tar_read(ma_v6,
store =targets_dir)
)%>%
bind_rows(
tar_read(ma_v7,
store =targets_dir)
)
waste <- tar_read(waste, store =targets_dir)
county_names <- read_tsv(here("data/demographic/county_fips.tsv")) %>%
rename_with(.cols = everything(), .fn =tolower) %>%
bind_rows()
covidestim <- tar_read(covidestim_biweekly_county,
store =targets_dir) %>%
select(-c(date)) %>%
distinct()
dates <- readRDS(here("data/data_raw/date_to_biweek.RDS"))
if(filter_versions) {
county <- county %>%
filter(version %in% c("v3", "v5", "v6", "v7"))
}wjoined <- county %>%
# set Nantucket, Duke fips to Nantucket since we have wastewater data
# for Nantucket
mutate(fips = ifelse(grepl("25019", fips), "25019", fips)) %>%
inner_join(waste) %>%
left_join(covidestim, relationship = "many-to-many") %>%
left_join(dates, relationship = "many-to-many") %>%
group_by(biweek) %>%
mutate(mindate = min(date),
maxdate = max(date)) %>%
ungroup()
counties_with_waste <- wjoined %>%
group_by(fips) %>%
mutate(obs_w_notna = sum(!is.na(mean_conc))) %>%
filter(obs_w_notna != 0) %>%
pull(fips) %>%
unique()
versions <- tibble(
version = c("v1", "v2", "v3", "v4", "v5", "v6", "v7"),
vlabel = factor(
c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value",
"$P(S_1|untested)$ Centered at Emp. Value",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
"$\\beta$ Centered at Emp. Value (Spline Smoothed)",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values ($\\beta$ Spline Smoothed)",
"$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
),
levels = c(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value",
"$P(S_1|untested)$ Centered at Emp. Value",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
"$\\beta$ Centered at Emp. Value (Spline Smoothed)",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values ($\\beta$ Spline Smoothed)",
"$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
) )
)
wjoined <- wjoined %>%
left_join(versions)Correlations
Normalization Strategy 1: Standardization
w_data <- wjoined %>%
select(-date) %>%
distinct() %>%
filter(fips %in% counties_with_waste) %>%
filter(!is.na(mean_conc)) %>%
group_by(fips, version) %>%
mutate(
mean_conc_std = (mean_conc - mean(mean_conc))/
sd(mean_conc),
exp_cases_median_std = (exp_cases_median - mean(exp_cases_median))/
sd(exp_cases_median),
infections_std = (infections - mean(infections))/
sd(infections),
exp_cases_lb_std = (exp_cases_lb - mean(exp_cases_lb))/
sd(exp_cases_lb),
exp_cases_ub_std = (exp_cases_ub - mean(exp_cases_ub))/
sd(exp_cases_ub),
positive_std = (positive - mean(positive))
/sd(positive)) %>%
ungroup()PB Counts and Wastewater
# correlations between PB counts and wastewater concentrations
correlations <- w_data %>%
group_by(version) %>%
summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
vlabel =unique(vlabel)) %>%
mutate(x=1.8,
y=.1)
w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y,
label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized by County)",
y = "Mean Wastewater Concentration\n(Standardized by County)",
title = "Correlations Between Probabilistic Bias Counts\nand Mean Wastewater Concentration")plt_wastewater_pb <- w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized by County)",
y = "Mean Wastewater Concentration\n(Standardized by County)",
title = "")PB Counts and Covidestim
# correlations between PB counts and Covidestim estimates
covidestim_correlations <- w_data %>%
filter(!is.na(infections_std)) %>%
group_by(vlabel) %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(exp_cases_median_std,infections_std,
use = "complete.obs")) %>%
mutate(x=2.8,
y=.1)
w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized by County)",
y = "Covidestim Estimates\n(Standardized by County)",
title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")plt_covidestim_pb <- w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized by County)",
y = "Covidestim Estimates\n(Standardized by County)",
title = "")Combined Figure
cowplot::plot_grid(plt_wastewater_pb, plt_covidestim_pb, ncol=2, labels="auto", label_size=19)ggsave(here("figures/correlations-wastewater-by-version.pdf"))cor_covid <- covidestim_correlations %>%
select(vlabel,
`Correlation with Covidestim Estimates`=Correlation)
cor_waste <- correlations %>%
select(vlabel,
`Correlation with Wastewater Concentrations`=Correlation)
cor_waste %>%
left_join(cor_covid) %>%
mutate(vlabel = gsub(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"Priors Do Not Vary by State and Date",
vlabel, fixed=TRUE),vlabel=gsub(" at","\nat", vlabel)) %>%
arrange(desc( `Correlation with Wastewater Concentrations`)) %>%
rename(Implementation = vlabel) %>%
kbl() %>%
kable_styling(full_width = TRUE)| Implementation | Correlation with Wastewater Concentrations | Correlation with Covidestim Estimates |
|---|---|---|
| Priors Do Not Vary by State and Date | 0.8979844 | 0.9862915 |
| \(\,\)CTIS Priors that Do Not Vary by State or Date\(\,\) | 0.8976666 | 0.9861254 |
| \(P(S_1|untested)\) Centered at Emp. Value | 0.8951465 | 0.9855135 |
| \(\beta\) Centered at Emp. Value (Spline Smoothed) | 0.8921199 | 0.9825280 |
| \(P(S_1|untested)\) and \(\beta\) Centered at Emp. Values (\(\beta\) Spline Smoothed) | 0.8856435 | 0.9791947 |
| \(\beta\) Centered at Emp. Value | 0.8754553 | 0.9731110 |
| \(P(S_1|untested)\) and \(\beta\) Centered at Emp. Values | 0.8637275 | 0.9666813 |
Covidestim and Wastewater
covidestim_correlations <- w_data %>%
filter(version=="v7") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(mean_conc_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1,
y=.1)
w_data %>%
filter(version=="v7") %>%
ggplot(aes(x=infections_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
theme_c() +
labs(x = "Covidestim Estimates\n(Standardized by County)",
y = "Wastewater Concentrations\n(Standardized by County)",
title = "Correlations Between Wastewater Concentrations\nand Covidestim Estimates")pb_correlations <- w_data %>%
filter(version=="v7") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(positive_std,mean_conc_std,
use = "complete.obs")) %>%
mutate(x=1.8,
y=.1) Compare to Wastewater Over Time
library(scales)
pal <- c("#10BAC5", "#1B10C5", "#EFB719", "#900C3F")
names(pal) <- c(#"$observed$",
'$P(S_1|untested)$ Centered at Emp. Value',
'$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values',
"$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value"
)
w_data <- w_data %>%
group_by(name) %>%
mutate(adjust = 1/(max(mean_conc))*(exp_cases_median[which.max(mean_conc)])) %>%
ungroup()
waste_df <- w_data %>%
select(biweek, mean_conc, name, adjust, fips) %>%
distinct() %>%
left_join(dates,relationship = "many-to-many" ) %>%
group_by(name, biweek) %>%
# center date within 2-week interval
summarize(date = min(date) + days(7),
mean_conc =unique(mean_conc),
adjust=unique(adjust),
fips = unique(fips)) %>%
mutate(name = gsub("County, MA", "", name))
pltlist <- w_data %>%
mutate(name = gsub("County, MA", "", name)) %>%
left_join(dates,relationship = "many-to-many" ) %>%
# filter(version %in% c("v7","v3")) %>%
filter(version=="v7") %>%
filter(biweek>=6) %>%
group_by(biweek) %>%
mutate(mindate = min(date),
maxdate=max(date)) %>%
ungroup() %>%
group_by(name) %>%
group_split() %>%
map(~ {
county_fips <- .x$fips %>% unique()
adj <- .x$adjust %>% unique()
.x %>%
# filter(fips == county_fips & date >= begin_date & date <= end_date) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5) +
geom_line(data = waste_df[which(waste_df$fips == county_fips),],
aes(x = date, y =mean_conc*adj,
color = 'Wastewater Effective Concentrations'),
# color = "#DB4048",
size = 1.1) +
geom_linerange(aes(xmin = mindate,
xmax = maxdate,
y = infections,
color = 'Covidestim Infection Estimates')) +
geom_point(data = waste_df[which(waste_df$fips == county_fips),],
aes(x = date, y =mean_conc*adj ),
color = "#DB4048",
alpha = .5,
size = 1.2) +
scale_y_continuous(sec.axis=sec_axis(~.x/adj,
name = 'Mean Wastewater Effective Concentration',
labels=comma),
labels=comma
) +
# facet_grid(name~vlabel,
# labeller=labeller(vlabel =as_labeller(
# TeX, default=label_parsed))) +
facet_wrap(~name, ncol=3) +
scale_fill_manual(values = pal, labels = c('','','',''),
name = "Probabilistic Bias Intervals") +
theme_c(legend.position="none") +
labs(y = "Value (Standardized by County)",
title = "",
x= "") +
scale_x_date(date_labels = "%b %Y") +
scale_color_manual(
name = '',
values = c(
'Covidestim Infection Estimates' = 'darkblue',
'Wastewater Effective Concentrations' = '#DB4048')) +
guides(color = guide_legend(override.aes = list(size = 12,
linewidth=3)),
fill = guide_legend(override.aes =list(size = 6)))
})
legend <- cowplot::get_legend(
pltlist[[1]] + theme(legend.position="top")
)
grid <- cowplot::plot_grid(plotlist=pltlist, ncol =3)
cowplot::plot_grid(legend, grid, nrow=2, rel_heights=c(.05,.95))ggsave(here('figures/simulation-intervals-wastewater.pdf'))Normalization Strategy 2: By Values at Biweek Where Wastewater Concentration is Maximized
w_data <- wjoined %>%
select(-date) %>%
distinct() %>%
filter(fips %in% counties_with_waste) %>%
filter(!is.na(mean_conc)) %>%
group_by(fips, version) %>%
mutate(mean_conc_std = mean_conc/ max(mean_conc),
max_conc = max(mean_conc),
exp_cases_median_std = exp_cases_median / exp_cases_median[which.max(mean_conc)],
exp_cases_lb_std = exp_cases_lb /exp_cases_median[which.max(mean_conc)],
exp_cases_ub_std = exp_cases_ub / exp_cases_median[which.max(mean_conc)],
infections_std = infections/ infections[which.max(mean_conc)],
positive_std = positive/max(positive)) %>%
ungroup()PB Counts and Wastewater
# correlations between PB counts and wastewater concentrations
correlations <- w_data %>%
group_by(version) %>%
summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
vlabel =unique(vlabel)) %>%
mutate(x=1.8,
y=.1)
w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized by County)",
y = "Mean Wastewater Concentration\n(Standardized by County)",
title = "Correlations Between Probabilistic Bias Counts\nand Mean Wastewater Concentration")
plt_wastewater_pb <- w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized by County by County)",
y = "Mean Wastewater Concentration\n(Standardized by County by County)",
title = "")PB Counts and Covidestim
# correlations between PB counts and Covidestim estimates
covidestim_correlations <- w_data %>%
filter(!is.na(infections_std)) %>%
group_by(vlabel) %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(exp_cases_median_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1.8,
y=.1)
w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized by County by County)",
y = "Covidestim Estimates\n(Standardized by County by County)",
title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")
plt_covidestim_pb <- w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized by County by County)",
y = "Covidestim Estimates\n(Standardized by County by County)",
title = "")Combined Figure
cowplot::plot_grid(plt_wastewater_pb, plt_covidestim_pb, ncol=2)
cor_covid <- covidestim_correlations %>%
select(vlabel,
`Correlation with Covidestim Estimates`=Correlation)
cor_waste <- correlations %>%
select(vlabel,
`Correlation with Wastewater Concentrations`=Correlation)
cor_waste %>%
left_join(cor_covid) %>%
mutate(vlabel = gsub(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"Priors Do Not Vary by County and Date",
vlabel, fixed=TRUE),vlabel=gsub(" at","\nat", vlabel)) %>%
arrange(desc( `Correlation with Wastewater Concentrations`)) %>%
rename(Implementation = vlabel) %>%
kbl() %>%
kable_styling(full_width = TRUE)Covidestim and Wastewater
covidestim_correlations <- w_data %>%
filter(version=="v7") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(mean_conc_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1,
y=.1)
w_data %>%
filter(version=="v7") %>%
ggplot(aes(x=infections_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
theme_c() +
labs(x = "Covidestim Estimates\n(Standardized by County by County)",
y = "Wastewater Concentrations\n(Standardized by County by County)",
title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")
pb_correlations <- w_data %>%
filter(version=="v7") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(positive_std,mean_conc_std,
use = "complete.obs")) %>%
mutate(x=1.8,
y=.1) Compare to Wastewater Over Time
library(scales)
pal <- c("#10BAC5", "#1B10C5", "#EFB719", "#900C3F")
names(pal) <- c(#"$observed$",
'$P(S_1|untested)$ Centered at Emp. Value',
'$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values',
"$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value"
)
waste_df <- w_data %>%
select(biweek, mean_conc_std, name) %>%
distinct() %>%
left_join(dates,relationship = "many-to-many" ) %>%
group_by(name, biweek) %>%
# center date within 2-week interval
summarize(date = min(date) + days(7),
mean_conc_std=unique(mean_conc_std)) %>%
mutate(name = gsub("County, MA", "", name))
w_data%>%
mutate(name = gsub("County, MA", "", name)) %>%
left_join(dates,relationship = "many-to-many" ) %>%
filter(version %in% c("v7","v3")) %>%
filter(biweek>=6) %>%
group_by(biweek) %>%
mutate(mindate = min(date),
maxdate=max(date)) %>%
ungroup() %>%
# filter(fips == county_fips & date >= begin_date & date <= end_date) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb_std,
ymax = exp_cases_ub_std,
fill = vlabel),
alpha = .5) +
geom_line(data = waste_df,
aes(x = date, y =mean_conc_std,
color = 'Wastewater Effective Concentrations'),
# color = "#DB4048",
size = 1.1) +
geom_linerange(aes(xmin = mindate,
xmax = maxdate,
y = infections_std,
color = 'Covidestim Infection Estimates')) +
geom_point(data = waste_df,
aes(x = date, y =mean_conc_std ),
color = "#DB4048",
alpha = .5,
size = 1.2) +
facet_grid(name~vlabel,
labeller=labeller(vlabel =as_labeller(
TeX, default=label_parsed))) +
scale_fill_manual(values = pal, labels = c('','','',''),
name = "Probabilistic Bias Intervals") +
theme_bw() +
theme_c() +
labs(y = "Value (Standardized by County by County)",
title = "",
x= "") +
scale_x_date(date_labels = "%b %Y") +
scale_color_manual(
name = '',
values = c(
'Covidestim Infection Estimates' = 'darkblue',
'Wastewater Effective Concentrations' = '#DB4048')) +
guides(color = guide_legend(override.aes = list(size = 12,
linewidth=3)),
fill = guide_legend(override.aes =list(size = 6)))Normalization Strategy 3: By Maximum (Separately)
For each county, normalize the wastewater concentration by the maximum concentration for that county and the probabilistic bias counts by the maximum median estimated counts for that county (2.5th percentile and 97.5th percentile also standardized by the maximum median estimated counts for that county).
PB Counts and Wastewater
# correlations between PB counts and wastewater concentrations
correlations <- w_data %>%
group_by(version) %>%
summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
vlabel =unique(vlabel)) %>%
mutate(x=1.8,
y=.1)
w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized by County)",
y = "Mean Wastewater Concentration\n(Standardized by County)",
title = "Correlations Between Probabilistic Bias Counts\nand Mean Wastewater Concentration")
plt_wastewater_pb <- w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized by County)",
y = "Mean Wastewater Concentration\n(Standardized by County)",
title = "")PB Counts and Covidestim
# correlations between PB counts and Covidestim estimates
covidestim_correlations <- w_data %>%
filter(!is.na(infections_std)) %>%
group_by(vlabel) %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(exp_cases_median_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1.8,
y=.1)
w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized by County)",
y = "Covidestim Estimates\n(Standardized by County)",
title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")
plt_covidestim_pb <- w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized by County)",
y = "Covidestim Estimates\n(Standardized by County)",
title = "")Combined Figure
cowplot::plot_grid(plt_wastewater_pb, plt_covidestim_pb, ncol=2)Covidestim and Wastewater
covidestim_correlations <- w_data %>%
filter(version=="v7") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(mean_conc_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1,
y=.1)
w_data %>%
filter(version=="v7") %>%
ggplot(aes(x=infections_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
theme_c() +
labs(x = "Covidestim Estimates\n(Standardized by County)",
y = "Wastewater Concentrations\n(Standardized by County)",
title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")
pb_correlations <- w_data %>%
filter(version=="v7") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(positive_std,mean_conc_std,
use = "complete.obs")) %>%
mutate(x=1,
y=.1) Simulation Intervals Over Time
county_dat <- county %>%
# set Nantucket, Duke fips to Nantucket since we have wastewater data
# for Nantucket
left_join(dates, relationship = "many-to-many") %>%
group_by(biweek) %>%
mutate(mindate = min(date),
maxdate = max(date)) %>%
ungroup() %>%
left_join(county_names, by=c('fips'='fips')) %>%
left_join(versions) %>%
mutate(name = ifelse(fips =="25007,25019", "Nantucket and Dukes", name))
county_dat %>%
filter(version=="v7") %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub),
alpha = .5,
show.legend=FALSE,
fill="#596281") +
geom_linerange(aes(xmin = mindate,
xmax=maxdate,
y = positive,
color = 'obs'),
size=.5) +
facet_wrap(~name, scales="free_y", ncol=3) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
axis.text.x=element_text(size=7),
legend.position = "top",
) +
scale_fill_manual(values = pal) +
labs(x="",
y = "Estimated Infections",
title = "Simulation Intervals Over Time",
subtitle = "Counties in Massachusetts") +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))ggsave(here('figures/simulation-intervals-county.pdf'))county_dat %>%
filter(fips %in% sample(unique(.data$fips),2)) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub),
alpha = .5,
show.legend=FALSE,
fill="#596281") +
geom_linerange(aes(xmin = mindate,
xmax=maxdate,
y = positive,
color = 'obs'),
size=.5) +
facet_grid(name~vlabel,
labeller=labeller(vlabel =as_labeller(
TeX, default=label_parsed)),
scales="free_y") +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
legend.position = "top",
) +
scale_fill_manual(values = pal) +
labs(x="",
y = "Estimated Infections",
title = "Simulation Intervals Over Time",
subtitle = "Counties in Massachusetts") +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))Ratio of Estimated Infections to Observed
county_dat %>%
filter(version=="v7") %>%
#filter(grepl("Nan",name)) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb/positive,
ymax = exp_cases_ub/positive),
alpha = .5,
show.legend=FALSE,
fill="#596281") +
# geom_linerange(aes(xmin = mindate,
# xmax=maxdate,
# y = positive,
# color = 'obs')) +
facet_wrap(~name,ncol=3) +
# facet_grid(name~vlabel,
# labeller=labeller(vlabel =as_labeller(
# TeX, default=label_parsed))) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
legend.position = "top") +
scale_fill_manual(values = pal) +
labs(x="",
y = "Estimated Infections",
title = "Ratio of Estimated to Observed Over Time",
subtitle = "Counties in Massachusetts") +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))Trends in Total Tests and Positive Tests
Informs our understanding of the trends in the ratio of estimated to observed infections.
Positive Tests and Test Positivity
############################################
# POSITIVE TESTS AND TEST POSITIVITY
############################################
pltlist <- county_dat %>%filter(version=="v7") %>%
mutate(testpos = positive/total) %>%
group_by(across(-date)) %>%
summarize(date=min(date) + days(7)) %>%
group_by(fips) %>%
mutate(adj = (1/max(testpos)) * max(positive)) %>%
group_split() %>%
map(~ {
adj <-unique(.x$adj)
.x %>%
ggplot(aes(x=date))+
geom_line(aes(y=positive, color = 'Positive Tests')) +
geom_point(aes(y=positive, color = 'Positive Tests'),
alpha=.5,
size=.8) +
geom_line(aes(y=testpos*adj, color='Test Positivity'),
linetype=2) +
geom_point(aes(y=testpos*adj, color='Test Positivity'),
alpha=.5,
size=.8) +
#scale_linetype_discrete(breaks = c(1,1,2)) +
scale_color_manual(values=c(
"Positive Tests"= "#279143",
"Test Positivity"= '#E38E4F')) +
guides(color = guide_legend(override.aes = list(linetype = c(1,2),
linewidth=c(2,2)))) +
theme_c() +
theme(axis.title = element_text(size=10),
axis.text =element_text(size=9),
legend.position="none") +
scale_y_continuous(labels=comma, sec.axis = sec_axis(~./adj , name = 'Test Positivity')) +
labs(color ='',
y="Positive Tests",
x= "Date") +
facet_wrap(~name)
})
legend <- get_legend(
pltlist[[1]] +
theme(legend.position = "top",
legend.text=element_text(size=16))
)
title_gg <- ggplot() +
labs(title = "Comparing Trends in Positive Tests and Test Positivity") +
theme(plot.title=element_text(face="bold",
hjust = .5,
size = 18,
margin =margin(5,0,2,0)))
plt <- cowplot::plot_grid(plotlist=pltlist, ncol=3)
plot_grid(title_gg, legend, plt, rel_heights=c(.03, .03,.94), ncol=1)Total Tests and Positive Tests
#####################################
# TOTAL TESTS AND POSITIVE TESTS
#####################################
pltlist <- county_dat %>%filter(version=="v7") %>%
mutate(testpos = positive/total) %>%
group_by(across(-date)) %>%
summarize(date=min(date) + days(7)) %>%
group_by(fips) %>%
mutate(adj = (1/max(total)) * max(positive)) %>%
group_split() %>%
map(~ {
adj <-unique(.x$adj)
.x %>%
ggplot(aes(x=date))+
geom_line(aes(y=positive, color = 'Positive Tests')) +
geom_point(aes(y=positive, color = 'Positive Tests'),
alpha=.5,
size=.8) +
geom_line(aes(y=total*adj, color='Total Tests'),
linetype=2) +
geom_point(aes(y=total*adj, color='Total Tests'),
alpha=.5,
size=.8) +
#scale_linetype_discrete(breaks = c(1,1,2)) +
scale_color_manual(values=c(
"Positive Tests"= "#279143",
"Total Tests"= '#E38E4F')) +
guides(color = guide_legend(override.aes = list(linetype = c(1,2)))) +
theme_c() +
theme(axis.title = element_text(size=11),
axis.text =element_text(size=8),
legend.position="none") +
scale_y_continuous(labels=label_scientific(),
n.breaks=4,
sec.axis = sec_axis(~./adj ,
name = 'Total Tests',
labels=label_scientific())) +
labs(color ='',
y="Positive Tests",
x= "Date") +
facet_wrap(~name)
})
legend <- cowplot::get_legend(
pltlist[[1]] +
theme(legend.position="top") +
guides(color = guide_legend(keywidth=2,
override.aes = list(linewidth=c(2,2),
linetype =c(1,2))))
)
title_gg <- ggplot() +
labs(title = "Comparing Trends in Total Tests and Positive Tests") +
theme(plot.title=element_text(face="bold",
hjust = .5,
size = 18,
margin =margin(5,0,2,0)))
plt <- cowplot::plot_grid(plotlist=pltlist, ncol=3)
cowplot::plot_grid(title_gg, legend, plt, rel_heights =c(.03, .03,.94),ncol=1) Percent Change in Total Tests and Positive Tests
###################################################
# PERCENT CHANGE: TOTAL TESTS AND POSITIVE TESTS
###################################################
county_dat %>%
filter(version=="v7") %>%
filter(!grepl("Nantucket",name)) %>%
mutate(testpos = positive/total) %>%
group_by(across(-date)) %>%
summarize(date=min(date) + days(7)) %>%
group_by(fips) %>%
arrange(biweek) %>%
mutate(pct_change_positive = (positive - lag(positive, 1))/lag(positive, 1),
pct_change_total = (total - lag(total, 1))/lag(total, 1)) %>%
ggplot(aes(x=date))+
geom_line(aes(y=pct_change_positive, color = 'Positive Tests')) +
geom_point(aes(y=pct_change_positive, color = 'Positive Tests'),
alpha=.5,
size=.8) +
geom_line(aes(y=pct_change_total, color='Total Tests'),
linetype=2) +
geom_point(aes(y=pct_change_total, color='Total Tests'),
alpha=.5,
size=.8) +
#scale_linetype_discrete(breaks = c(1,1,2)) +
scale_color_manual(values=c(
"Positive Tests"= "#279143",
"Total Tests"= '#E38E4F')) +
guides(color = guide_legend(override.aes = list(linetype = c(1,2)))) +
theme_c() +
theme(axis.title = element_text(size=15),
axis.text.x=element_text(size=9),
legend.position="top") +
# scale_y_continuous(labels=comma, sec.axis = sec_axis(~./adj , name = 'Test Positivity')) +
labs(color ='',
y="Percent Change",
x= "Date",
title = "Percent Change in Positive Tests and Total Tests") +
facet_wrap(~name, ncol=3, scales="free_x") +
scale_y_continuous(limits = c(-1,5.5),
labels=percent) +
scale_x_date(date_breaks="3 months", date_labels="%b %Y")Concordance with Covidestim
wjoined <- wjoined %>%
mutate(name = gsub("County, MA","", name))
ma_concordance <- wjoined %>%
filter(biweek >=6) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", name )) %>%
select(-date) %>%
distinct() %>%
select(exp_cases_lb, exp_cases_ub,
name, biweek, vlabel, infections) %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections &
infections <= exp_cases_ub ~ "In Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
) ) %>%
group_by(in_interval, name, vlabel) %>%
summarize(n_per_county=n()) %>%
group_by(vlabel, in_interval) %>%
summarize(n_per_version = sum(n_per_county)) %>%
group_by(vlabel) %>%
mutate(total = sum(n_per_version)) %>%
ungroup() %>%
mutate(prop=n_per_version/total)
ma_concordance_by_county <- wjoined %>%
filter(biweek >=6) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", name )) %>%
select(-date) %>%
mutate(name = gsub("$", "",
name, fixed=TRUE)) %>%
select(exp_cases_lb, exp_cases_ub,
name, biweek, vlabel, infections) %>%
distinct() %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections &
infections <= exp_cases_ub ~ "In Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
) ) %>%
group_by(in_interval, name, vlabel) %>%
summarize(n_per_county=n()) %>%
group_by(name,vlabel) %>%
mutate(total = sum(n_per_county),
prop = n_per_county/total) %>%
mutate(state="MA") %>%
ungroup()Concordance by County
##############################################
# stacked bar chart of concordance by county
##############################################
ma_concordance_by_county %>%
filter(vlabel=="$P(S_1|untested)$ Centered at Emp. Value") %>%
group_by(name) %>%
mutate(m = prop[which(in_interval=="In Interval")]) %>%
ungroup() %>%
mutate(in_interval = factor(in_interval, levels = c("Above Interval",
"In Interval", "Below Interval"))) %>%
ggplot(aes(x= fct_reorder(name,m),
fill = (in_interval), y =prop)) +
geom_bar(stat="identity",position="stack") +
theme_c() +
coord_flip()+
scale_fill_manual(values=c("In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79",
"Below Interval"="#7997D2")) +
labs(x="",
y= "Proportion",
fill = "Covidestim Median:",
title = "Massachusetts: Proportion Where Covidestim Median\nWas Within, Above, or Below the Median",
subtitle = TeX("For Implementation with $P(S_1|untested)$ Centered at Survey Value")) +
theme_c() +
theme(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 17, hjust=1))Concordance by Version
#
in_interval <- ma_concordance %>%
filter(in_interval == "In Interval") %>%
mutate(n_interval = n_per_version) %>%
select(vlabel, n_interval)
labels <- ma_concordance %>%
filter(in_interval=="In Interval") %>%
arrange((prop)) %>%
pull(vlabel) %>%
unique() %>%
as.character()
#########################################################
# CONCORDANCE BY VERSION
#########################################################
ma_concordance %>%
left_join(in_interval) %>%
mutate(in_interval=factor(in_interval,
levels=c("Above Interval",
"In Interval",
"Below Interval"))) %>%
ggplot(aes(y =fct_reorder(vlabel,n_interval),
x = prop,
fill=in_interval)) +
#facet_wrap(~fct_reorder((vlabel),m), ncol=1) +
geom_bar(stat="identity",position="stack", color="darkgray", linewidth=.2) +
theme_c(axis.text.y = element_text(size = 14, hjust=1),
axis.title.x = element_text(size=14),
axis.text.x=element_text(size=10),
legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt')) +
scale_y_discrete(labels=unname(TeX(labels))) +
labs(y ="",
x="Proportion Containing\nthe Covidestim Median",
fill = "Covidestim Median:",
title = "Where Covidestim Medians Fall\nRelative to Probabilistic Bias Intervals",
subtitle = "By Implementation, in Massachusetts") +
scale_fill_manual(values=c("Below Interval"="#7997D2",
"In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79"),
breaks = c("Below Interval",
"In Interval",
"Above Interval")) +
guides(fill=guide_legend(byrow=TRUE)) +
scale_x_continuous(n.breaks = 7,
expand=c(0,0),
limits=c(-.05,1))ggsave(here('figures/concordance-covidestim-county.pdf'))ma_concordance %>%
group_by(vlabel) %>%
mutate(total=sum(n_per_version)) %>%
filter(in_interval == "In Interval") %>%
mutate(n_interval = n_per_version) %>%
ggplot(aes(y=fct_reorder(vlabel,prop), x=prop)) +
geom_text(aes(label=round(prop,3), x= prop+.03),
angle=0) +
geom_bar(stat="identity", fill="#596281") +
scale_y_discrete(breaks= unique(ma_concordance$vlabel),
labels=function(x)TeX(x)) +
scale_x_continuous(expand=c(0,0),
limits=c(0,.9), n.breaks=7) +
theme_c(axis.text.y = element_text(size = 13, hjust=1),
axis.title.x = element_text(size=14),
axis.text.x=element_text(size=10)) +
labs(y="",
x="Proportion Containing the Covidestim Median",
title="Proportion Containing the Covidestim Median",
subtitle = "By Implementation")Summary of Comparison Between Implementations
Although the spline-smoothing improved the correlations with wastewater concentrations and the correlations with Covidestim estimates, it did not improve the proportion containing the Covidestim median.
The implementation centering \(\Pr(S_1|\text{untested})\) at the survey value along with the implementation that does not vary the priors by state or date are the most correlated with Covidestim estimates, wastewater concentrations, and have the highest proportion of county-biweeks containing the Covidestim median.